device-dm: Use SIGHUP before SIGKILL
authorKeir Fraser <keir.fraser@citrix.com>
Tue, 12 Feb 2008 10:57:49 +0000 (10:57 +0000)
committerKeir Fraser <keir.fraser@citrix.com>
Tue, 12 Feb 2008 10:57:49 +0000 (10:57 +0000)
Make qemu unblock SIGHUP and make sure the default handler is in
place. Have the domain killer send SIGHUP to the device-model script,
allow the script 10s to clean up, and if still not dead, send
SIGKILL.

Signed-off-by: Samuel Thibault <samuel.thibault@eu.citrix.com>
tools/ioemu/vl.c
tools/python/xen/xend/image.py

index 01dfef855528686229976c180ba222b99bd577b0..452bfcc5551a313a3e4c1edd9c37c4c8a3f6d221 100644 (file)
@@ -7928,11 +7928,13 @@ int main(int argc, char **argv)
     }
 #endif
 
-    /* Unblock SIGTERM, which may have been blocked by the caller */
+    /* Unblock SIGTERM and SIGHUP, which may have been blocked by the caller */
+    signal(SIGHUP, SIG_DFL);
     sigemptyset(&set);
     sigaddset(&set, SIGTERM);
+    sigaddset(&set, SIGHUP);
     if (sigprocmask(SIG_UNBLOCK, &set, NULL) == -1)
-        fprintf(stderr, "Failed to unblock SIGTERM\n");
+        fprintf(stderr, "Failed to unblock SIGTERM and SIGHUP\n");
 
     main_loop();
     quit_timers();
index 4ceb36194f40d052ae1cc2ee14a04461305be354..069cde7dbee72fbb86d38e6647ccf51cf3e65d14 100644 (file)
@@ -335,16 +335,27 @@ class ImageHandler:
             return
         if self.pid:
             try:
-                os.kill(self.pid, signal.SIGKILL)
+                os.kill(self.pid, signal.SIGHUP)
             except OSError, exn:
                 log.exception(exn)
             try:
-                os.waitpid(self.pid, 0)
+                # Try to reap the child every 100ms for 10s. Then SIGKILL it.
+                for i in xrange(100):
+                    (p, rv) = os.waitpid(self.pid, os.WNOHANG)
+                    if p == self.pid:
+                        break
+                    time.sleep(0.1)
+                else:
+                    log.warning("DeviceModel %d took more than 10s "
+                                "to terminate: sending SIGKILL" % self.pid)
+                    os.kill(self.pid, signal.SIGKILL)
+                    os.waitpid(self.pid, 0)
             except OSError, exn:
                 # This is expected if Xend has been restarted within the
                 # life of this domain.  In this case, we can kill the process,
                 # but we can't wait for it because it's not our child.
-                pass
+                # We just make really sure it's going away (SIGKILL) first.
+                os.kill(self.pid, signal.SIGKILL)
             self.pid = None
             state = xstransact.Remove("/local/domain/0/device-model/%i"
                                       % self.vm.getDomid())